[mlir][xegpu] Add definitions of MemDescType and related ops. #153273

chencha3 · 2025-08-12T20:19:24Z

This PR adds definition of MemDesc, a type represents a multi-dimensional array in shared local memory and related load store operations for Intel Xe GPUs.

llvmbot · 2025-08-12T20:36:26Z

@llvm/pr-subscribers-mlir

@llvm/pr-subscribers-mlir-gpu

Author: Chao Chen (chencha3)

Changes

This PR adds definition of MatrixDesc, a type represents a block of data stored in shared local memory and related load store operations for Intel Xe GPUs.

Full diff: https://github.com/llvm/llvm-project/pull/153273.diff

7 Files Affected:

(modified) mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td (+106)
(modified) mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td (+22)
(modified) mlir/lib/Dialect/XeGPU/IR/CMakeLists.txt (+1)
(modified) mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp (+56)
(modified) mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp (+63)
(modified) mlir/test/Dialect/XeGPU/invalid.mlir (+44)
(modified) mlir/test/Dialect/XeGPU/ops.mlir (+47)

diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
index 1a6a34c8d775a..f536650e9d872 100644
--- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
@@ -1101,4 +1101,110 @@ def XeGPU_ConvertLayoutOp: XeGPU_Op<"convert_layout", [Pure, AllTypesMatch<["sou
     let hasCanonicalizer = 1;
 }
 
+def isSharedPred : CPred<"isSharedMemory(llvm::cast<mlir::MemRefType>($_self))">;
+class StaticShared1DMemRefOf<list<Type> allowedTypes> :
+  ConfinedType<MemRefRankOf<allowedTypes, [1]>, [HasStaticShapePred, isSharedPred],
+     "statically shaped " # MemRefOf<allowedTypes>.summary # " for shared memory",
+     "mlir::MemRefType">;
+
+class SizeInBits<string name> :
+  StrFunc<"llvm::cast<mlir::ShapedType>($" # name # ".getType()).getNumElements()"
+          "*llvm::cast<mlir::ShapedType>($" # name # ".getType()).getElementTypeBitWidth()">;
+class AllMemSizesMatch<list<string> names> :
+    AllMatchSameOperatorTrait<names, SizeInBits<"_self">.result,
+                              "size in bits">;
+
+def XeGPU_CreateMatrixDescOp: XeGPU_Op<"create_matrix_desc", [Pure,
+      AllMemSizesMatch<["source", "matrix_desc"]>]>  {
+  let summary = "Create a matrix descriptor.";
+  let description = [{
+    Creates a matrix descriptor from a shared local memory (SLM) buffer.
+    The resulting matrix descriptor has to have the same size as the underlying
+    shared local memory.
+
+    Arguments:
+     - `source` : a 1D statically shaped memref with element type i8, representing the raw SLM buffer.
+    Results:
+     - `matrix_desc` : the matrix descriptor.
+  }];
+  let arguments = (ins StaticShared1DMemRefOf<[I8]>:$source);
+  let results = (outs XeGPU_MatrixDesc:$matrix_desc);
+  let assemblyFormat = "$source prop-dict attr-dict `` `:` type($source) `->` qualified(type($matrix_desc))";
+}
+
+def XeGPU_LoadMatrixOp: XeGPU_Op<"load_matrix", [MemoryEffects<[MemRead]>,
+                              AllElementTypesMatch<["matrix_desc", "res"]>,
+                              AllRanksMatch<["matrix_desc", "res"]>]>  {
+  let arguments = (ins XeGPU_MatrixDesc:$matrix_desc,
+    Variadic<Index>: $offsets,
+    DenseI64ArrayAttr: $const_offsets,
+    OptionalAttr<LayoutTrait>:$layout
+  );
+  let results = (outs XeGPU_ValueType:$res);
+  let assemblyFormat = [{
+    $matrix_desc `` custom<DynamicIndexList>($offsets, $const_offsets)
+    prop-dict attr-dict `` `:` type(operands) `->` type(results)
+  }];
+
+  let description = [{
+    This operation reads a block of data from shared local memory (SLM)
+    using the provided matrix descriptor.
+
+    Arguments:
+     - `matrix_desc`: the matrix descriptor identifying the SLM region.
+     - `offsets`: the coordinates within the matrix to read from.
+    Results:
+     - `res`: the matrix elements loaded from SLM.
+  }];
+
+  let builders = [
+    OpBuilder<(ins "Type":$res, "TypedValue<MatrixDescType>": $matrix_desc,
+                    "llvm::ArrayRef<OpFoldResult>": $offsets, "LayoutTrait": $layout)>,
+  ];
+  let extraClassDeclaration = [{
+    SmallVector<OpFoldResult> getMixedOffsets() {
+      return getMixedValues(getConstOffsets(), getOffsets(), getContext());
+    }
+  }];
+
+  let hasVerifier = 1;
+}
+
+def XeGPU_StoreMatrixOp: XeGPU_Op<"store_matrix", [MemoryEffects<[MemWrite]>,
+                              AllElementTypesMatch<["matrix_desc", "data"]>,
+                              AllRanksMatch<["matrix_desc", "data"]>]> {
+  let arguments = (ins
+    XeGPU_MatrixDesc:$matrix_desc,
+    Variadic<Index>: $offsets,
+    DenseI64ArrayAttr: $const_offsets,
+    XeGPU_ValueType:$data,
+    OptionalAttr<LayoutTrait>:$layout
+  );
+  let assemblyFormat = [{
+    $matrix_desc `` custom<DynamicIndexList>($offsets, $const_offsets) `,` $data
+    prop-dict attr-dict `:` type(operands)
+  }];
+  let description = [{
+    This operation writes the `data` fragment into the shared local memory region
+    identified by `matrix_desc`.
+
+    Arguments:
+     - `matrix_desc`: the matrix descriptor specifying the SLM region.
+     - `offsets`: the coordinates within the matrix where the data will be written.
+     - `data`: the values to be stored in the matrix.
+  }];
+  let builders = [
+    OpBuilder<(ins "TypedValue<MatrixDescType>": $matrix_desc, "llvm::ArrayRef<OpFoldResult>": $offsets,
+                   "Value" : $data, "LayoutTrait": $layout)>,
+  ];
+  let extraClassDeclaration = [{
+    SmallVector<OpFoldResult> getMixedOffsets() {
+      return getMixedValues(getConstOffsets(), getOffsets(), getContext());
+    }
+  }];
+
+  let hasVerifier = 1;
+}
+
+
 #endif // MLIR_DIALECT_XEGPU_IR_XEGPUOPS_TD
diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td
index b268cabb5d266..02cabce82398b 100644
--- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td
@@ -201,4 +201,26 @@ def XeGPU_Nbarrier: XeGPUTypeDef<"Nbarrier", "nbarrier", [], "mlir::Type"> {
   }];
 }
 
+def XeGPU_MatrixDesc: XeGPUTypeDef<"MatrixDesc", "matrix_desc", [ShapedTypeInterface], "mlir::Type"> {
+  let summary = "MatrixDesc describing the data in SLM";
+  let description = [{
+    MatrixDesc represents a block of data stored in shared local memory.
+    By default, unless a layout attribute is provided, the data is stored
+    contiguously in row-major order within the region.
+  }];
+  let parameters = (ins ArrayRefParameter<"int64_t">: $shape,
+                        "mlir::Type": $elementType,
+                        OptionalParameter<"mlir::Attribute">: $layout);
+
+  let extraClassDeclaration = [{
+    bool hasRank() const { return true; }
+
+    MatrixDescType cloneWith(std::optional<llvm::ArrayRef<int64_t>> shape, Type elementType) const {
+      return MatrixDescType::get(getContext(), shape.value_or(getShape()), elementType, getLayout());
+    }
+  }];
+
+  let hasCustomAssemblyFormat = true;
+}
+
 #endif // MLIR_DIALECT_XEGPU_IR_XEGPUTYPES_TD
diff --git a/mlir/lib/Dialect/XeGPU/IR/CMakeLists.txt b/mlir/lib/Dialect/XeGPU/IR/CMakeLists.txt
index 7c6a4f37db9af..603fb5d237544 100644
--- a/mlir/lib/Dialect/XeGPU/IR/CMakeLists.txt
+++ b/mlir/lib/Dialect/XeGPU/IR/CMakeLists.txt
@@ -17,6 +17,7 @@ add_mlir_dialect_library(MLIRXeGPUDialect
   MLIRAffineUtils
   MLIRArithUtils
   MLIRDialectUtils
+  MLIRGPUDialect
   MLIRIR
   MLIRViewLikeInterface
   MLIRVectorDialect
diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp
index d997296a22c20..ac9e994d4872c 100644
--- a/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp
+++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp
@@ -591,6 +591,62 @@ LogicalResult TensorDescType::verify(
   return success();
 }
 
+//===----------------------------------------------------------------------===//
+// XeGPU_MatrixDescType
+//===----------------------------------------------------------------------===//
+mlir::Type MatrixDescType::parse(::mlir::AsmParser &parser) {
+  llvm::SmallVector<int64_t> shape;
+  mlir::Type elementType;
+  mlir::FailureOr<mlir::Attribute> layout;
+
+  // Parse literal '<'
+  if (parser.parseLess())
+    return {};
+
+  auto shapeLoc = parser.getCurrentLocation();
+  if (mlir::failed(parser.parseDimensionList(shape, false, true))) {
+    parser.emitError(shapeLoc, "failed to parse parameter 'shape'");
+    return {};
+  }
+
+  auto elemTypeLoc = parser.getCurrentLocation();
+  if (mlir::failed(parser.parseType(elementType))) {
+    parser.emitError(elemTypeLoc, "failed to parse parameter 'elementType'");
+    return {};
+  }
+
+  // parse optional attributes
+  if (mlir::succeeded(parser.parseOptionalComma())) {
+    mlir::Attribute attr;
+    ParseResult res = parser.parseAttribute(attr);
+    if (mlir::failed(res))
+      return {};
+    layout = attr;
+  }
+
+  // Parse literal '>'
+  if (parser.parseGreater())
+    return {};
+
+  MLIRContext *ctxt = parser.getContext();
+  return MatrixDescType::getChecked(
+      [&]() { return parser.emitError(parser.getNameLoc()); }, ctxt, shape,
+      elementType, layout.value_or(mlir::Attribute()));
+}
+
+void MatrixDescType::print(::mlir::AsmPrinter &printer) const {
+  printer << "<";
+
+  printer.printDimensionList(getShape());
+  printer << 'x';
+  printer << getElementType();
+
+  if (auto layout = getLayout())
+    printer << ", " << layout;
+
+  printer << ">";
+}
+
 } // namespace xegpu
 } // namespace mlir
 
diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
index 2cd086feb5deb..2051d7030340e 100644
--- a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
+++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
@@ -7,6 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "mlir/Dialect/Arith/Utils/Utils.h"
+#include "mlir/Dialect/GPU/IR/GPUDialect.h"
 #include "mlir/Dialect/Utils/IndexingUtils.h"
 #include "mlir/Dialect/Utils/StaticValueUtils.h"
 #include "mlir/Dialect/XeGPU/IR/XeGPU.h"
@@ -21,6 +22,15 @@
 namespace mlir {
 namespace xegpu {
 
+bool isSharedMemory(const MemRefType &memrefTy) {
+  Attribute attr = memrefTy.getMemorySpace();
+  if (auto intAttr = llvm::dyn_cast<IntegerAttr>(attr))
+    return intAttr.getInt() == 3;
+  if (auto memrefSpace = llvm::dyn_cast<MemorySpaceAttr>(attr))
+    return memrefSpace.getValue() == MemorySpace::SLM;
+  return gpu::GPUDialect::isWorkgroupMemoryAddressSpace(attr);
+}
+
 template <typename T>
 static std::string makeString(T array, bool breakline = false) {
   std::string buf;
@@ -925,6 +935,59 @@ void ConvertLayoutOp::getCanonicalizationPatterns(RewritePatternSet &patterns,
   patterns.add<FoldConvertLayoutOp>(context);
 }
 
+//===----------------------------------------------------------------------===//
+// XeGPU_LoadMatrixOp
+//===----------------------------------------------------------------------===//
+void LoadMatrixOp::build(OpBuilder &builder, OperationState &state, Type res,
+                         TypedValue<MatrixDescType> matrixDesc,
+                         llvm::ArrayRef<OpFoldResult> offsets,
+                         LayoutTrait layout) {
+  llvm::SmallVector<Value> dynamicOffsets;
+  llvm::SmallVector<int64_t> staticOffsets;
+
+  dispatchIndexOpFoldResults(offsets, dynamicOffsets, staticOffsets);
+  auto staticOffsetsAttr = builder.getDenseI64ArrayAttr(staticOffsets);
+
+  build(builder, state, res, matrixDesc, dynamicOffsets, staticOffsetsAttr,
+        layout);
+}
+
+LogicalResult LoadMatrixOp::verify() {
+  ArrayRef<int64_t> valueShape = getRes().getType().getShape();
+  ArrayRef<int64_t> mdescShape = getMatrixDesc().getType().getShape();
+  if (llvm::any_of(llvm::zip_equal(valueShape, mdescShape),
+                   [](auto p) { return std::get<0>(p) > std::get<1>(p); }))
+    return emitOpError("result shape must not exceed matrix desc shape.");
+  return success();
+}
+
+//===----------------------------------------------------------------------===//
+// XeGPU_StoreMatrixOp
+//===----------------------------------------------------------------------===//
+void StoreMatrixOp::build(OpBuilder &builder, OperationState &state,
+                          TypedValue<MatrixDescType> matrixDesc,
+                          llvm::ArrayRef<OpFoldResult> offsets, Value data,
+                          LayoutTrait layout) {
+  llvm::SmallVector<Value> dynamicOffsets;
+  llvm::SmallVector<int64_t> staticOffsets;
+
+  dispatchIndexOpFoldResults(offsets, dynamicOffsets, staticOffsets);
+  auto staticOffsetsAttr = builder.getDenseI64ArrayAttr(staticOffsets);
+
+  build(builder, state, matrixDesc, dynamicOffsets, staticOffsetsAttr, data,
+        layout);
+}
+
+LogicalResult StoreMatrixOp::verify() {
+  ArrayRef<int64_t> dataShape = getData().getType().getShape();
+  ArrayRef<int64_t> mdescShape = getMatrixDesc().getType().getShape();
+  if (llvm::any_of(llvm::zip_equal(dataShape, mdescShape),
+                   [](auto p) { return std::get<0>(p) > std::get<1>(p); }))
+    return emitOpError("data shape must not exceed matrix desc shape.");
+
+  return success();
+}
+
 } // namespace xegpu
 } // namespace mlir
 
diff --git a/mlir/test/Dialect/XeGPU/invalid.mlir b/mlir/test/Dialect/XeGPU/invalid.mlir
index 44e15dd7cbb38..2feb010d343a8 100644
--- a/mlir/test/Dialect/XeGPU/invalid.mlir
+++ b/mlir/test/Dialect/XeGPU/invalid.mlir
@@ -762,3 +762,47 @@ func.func @slice_attr_repeat_dim() {
   return
 }
 
+// -----
+func.func @create_matrix_desc_non_slm() {
+  %m = memref.alloca() {alignment = 1024} : memref<2048xi8, 1>
+  // expected-error@+1 {{operand #0 must be statically shaped memref of 8-bit signless integer values for shared memory}}
+  %matrix_desc = xegpu.create_matrix_desc %m : memref<2048xi8, 1> -> !xegpu.matrix_desc<16x64xf16>
+  return
+}
+
+// -----
+func.func @create_matrix_desc_mismatch_sizes() {
+  %m = memref.alloca() {alignment = 1024} : memref<2048xi8, 3>
+  // expected-error@+1 {{failed to verify that all of {source, matrix_desc} have same size in bits}}
+  %matrix_desc = xegpu.create_matrix_desc %m : memref<2048xi8, 3> -> !xegpu.matrix_desc<16x32xf16>
+  return
+}
+
+// -----
+func.func @load_matrix_desc_mismatch_element_type(%arg0: !xegpu.matrix_desc<16x64xf16>) {
+  // expected-error@+1 {{failed to verify that all of {matrix_desc, res} have same element type}}
+  %data = xegpu.load_matrix %arg0[8, 8]: !xegpu.matrix_desc<16x64xf16> -> vector<8x16xf32>
+  return
+}
+
+// -----
+func.func @load_matrix_desc_invalid_result_size(%arg0: !xegpu.matrix_desc<16x64xf16>) {
+  // expected-error@+1 {{result shape must not exceed matrix desc shape}}
+  %data = xegpu.load_matrix %arg0[8, 8]: !xegpu.matrix_desc<16x64xf16> -> vector<32x16xf16>
+  return
+}
+
+// -----
+func.func @store_matrix_desc_mismatch_element_type(%arg0: !xegpu.matrix_desc<16x64xf16>, %arg1: vector<16x16xf32>) {
+  // expected-error@+1 {{failed to verify that all of {matrix_desc, data} have same element type}}
+  xegpu.store_matrix %arg0[8, 8], %arg1: !xegpu.matrix_desc<16x64xf16>, vector<16x16xf32>
+  return
+}
+
+// -----
+func.func @store_matrix_desc_invalid_data_size(%arg0: !xegpu.matrix_desc<16x64xf16>, %arg1: vector<32x32xf16>) {
+  // expected-error@+1 {{data shape must not exceed matrix desc shape}}
+  xegpu.store_matrix %arg0[8, 8], %arg1: !xegpu.matrix_desc<16x64xf16>, vector<32x32xf16>
+  return
+}
+
diff --git a/mlir/test/Dialect/XeGPU/ops.mlir b/mlir/test/Dialect/XeGPU/ops.mlir
index 67c00f5a9cc2f..cda8f0ac1bb40 100644
--- a/mlir/test/Dialect/XeGPU/ops.mlir
+++ b/mlir/test/Dialect/XeGPU/ops.mlir
@@ -751,4 +751,51 @@ gpu.func @fence() {
   gpu.return
 }
 
+// CHECK-LABEL: gpu.func @create_matrix_desc({{.*}}) {
+gpu.func @create_matrix_desc() {
+  //CHECK: [[alloc:%.+]] = memref.alloca() {alignment = 1024 : i64} : memref<2048xi8, 3>
+  //CHECK: [[mdesc:%.+]] = xegpu.create_matrix_desc [[alloc]] : memref<2048xi8, 3> -> !xegpu.matrix_desc<16x64xf16>
+  %m = memref.alloca() {alignment = 1024} : memref<2048xi8, 3>
+  %matrix_desc = xegpu.create_matrix_desc %m : memref<2048xi8, 3> -> !xegpu.matrix_desc<16x64xf16>
+  gpu.return
+}
+
+// CHECK-LABEL: gpu.func @create_matrix_desc_with_stride({{.*}}) {
+gpu.func @create_matrix_desc_with_stride() {
+  //CHECK: [[alloc:%.+]] = memref.alloca() {alignment = 1024 : i64} : memref<2048xi8, 3>
+  //CHECK: [[mdesc:%.+]] = xegpu.create_matrix_desc [[alloc]] : memref<2048xi8, 3> -> !xegpu.matrix_desc<16x64xf16, strided<[1, 16]>>
+  %m = memref.alloca() {alignment = 1024} : memref<2048xi8, 3>
+  %matrix_desc = xegpu.create_matrix_desc %m : memref<2048xi8, 3> -> !xegpu.matrix_desc<16x64xf16, strided<[1, 16]>>
+  gpu.return
+}
+
+// CHECK: gpu.func @load_matrix_desc([[ARG0:%.+]]: !xegpu.matrix_desc<16x64xf16>)
+gpu.func @load_matrix_desc(%arg0: !xegpu.matrix_desc<16x64xf16>) {
+  // CHECK: xegpu.load_matrix [[ARG0]][8, 8] : !xegpu.matrix_desc<16x64xf16> -> vector<8x16xf16>
+  %data = xegpu.load_matrix %arg0[8, 8]: !xegpu.matrix_desc<16x64xf16> -> vector<8x16xf16>
+  gpu.return
+}
+
+// CHECK: gpu.func @load_matrix_desc_with_stride(%arg0: !xegpu.matrix_desc<16x64xf16, strided<[1, 16]>>)
+gpu.func @load_matrix_desc_with_stride(%arg0: !xegpu.matrix_desc<16x64xf16, strided<[1, 16]>>) {
+  // CHECK: xegpu.load_matrix [[ARG0]][8, 8] : !xegpu.matrix_desc<16x64xf16, strided<[1, 16]>> -> vector<8x16xf16>
+  %data = xegpu.load_matrix %arg0[8, 8]: !xegpu.matrix_desc<16x64xf16, strided<[1, 16]>> -> vector<8x16xf16>
+  gpu.return
+}
+
+
+// CHECK: gpu.func @store_matrix_desc([[ARG0:%.+]]: !xegpu.matrix_desc<16x64xf16>, [[ARG1:%.+]]: vector<16x16xf16>)
+gpu.func @store_matrix_desc(%arg0: !xegpu.matrix_desc<16x64xf16>, %arg1: vector<16x16xf16>) {
+  // CHECK: xegpu.store_matrix [[ARG0]][8, 8], [[ARG1]] : !xegpu.matrix_desc<16x64xf16>, vector<16x16xf16>
+  xegpu.store_matrix %arg0[8, 8], %arg1: !xegpu.matrix_desc<16x64xf16>, vector<16x16xf16>
+  gpu.return
+}
+
+// CHECK: gpu.func @store_matrix_desc_with_stride([[ARG0:%.+]]: !xegpu.matrix_desc<16x64xf16, strided<[1, 16]>>, [[ARG1:%.+]]: vector<16x16xf16>)
+gpu.func @store_matrix_desc_with_stride(%arg0: !xegpu.matrix_desc<16x64xf16, strided<[1, 16]>>, %arg1: vector<16x16xf16>) {
+  // CHECK: xegpu.store_matrix [[ARG0]][8, 8], [[ARG1]] : !xegpu.matrix_desc<16x64xf16, strided<[1, 16]>>, vector<16x16xf16>
+  xegpu.store_matrix %arg0[8, 8], %arg1: !xegpu.matrix_desc<16x64xf16, strided<[1, 16]>>, vector<16x16xf16>
+  gpu.return
+}
+
 }

silee2 · 2025-08-12T20:54:24Z

mlir/test/Dialect/XeGPU/ops.mlir

  gpu.return
 }

+// CHECK-LABEL: gpu.func @create_matrix_desc({{.*}}) {


Can you add a test with layout attribute?
Without layout attribute, matrix_desc is not different from a memref created by memref.view
It appears to me, what sets matrix_desc apart from memref is the optional layout attribute.

Actually, the distribution layout has been moved to the load/store/subview ops. The data layout is kept, and we have a test case for it. Yeah I agree with you that matrix_desc is similar to memref now on Xe2, except matrix_desc is binding to slm only.

The key difference is that matrix_desc support memory layout which memref doesn't. For example, blocked layout, or swizzling is another possiblity but not added at this point. @silee2 @chencha3

Garra1980 · 2025-08-12T22:05:21Z

also cc @akroviakov

akroviakov

The PR description lacks some details regarding how these ops play into the distribution passes, especially since these ops seem to prioritize usage above WI-level, judging by numerous tests with 2D payload.
Will the distribution perform a plain offset and payload shape manipulation?
What about the lowering? Does XeVM or SPIRV beneath it offer an efficient way to execute this op? If so, would the lowering be able to derive all of the needed built-in/intrinsic arguments without any named attributes?

akroviakov · 2025-08-13T07:54:59Z

mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp

+bool isSharedMemory(const MemRefType &memrefTy) {
+  Attribute attr = memrefTy.getMemorySpace();
+  if (auto intAttr = llvm::dyn_cast<IntegerAttr>(attr))
+    return intAttr.getInt() == 3;


static_cast<int>(xevm::AddrSpace::SHARED) seems more appropriate.
https://github.com/llvm/llvm-project/blob/main/mlir/include/mlir/Dialect/LLVMIR/XeVMOps.td#L329

Added support for this.

akroviakov · 2025-08-13T07:56:23Z

mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp

+LogicalResult LoadMatrixOp::verify() {
+  ArrayRef<int64_t> valueShape = getRes().getType().getShape();
+  ArrayRef<int64_t> mdescShape = getMatrixDesc().getType().getShape();
+  if (llvm::any_of(llvm::zip_equal(valueShape, mdescShape),


Does AllShapesMatch in .td definition not suit this purpose?

Per latest definition, it can load or store a smaller shape of data from a bigger MatrixDesc. So AllShapesMatch doesn't fit here.

Ah, sorry, missed the > in lambda.

chencha3 · 2025-08-13T14:22:36Z

The PR description lacks some details regarding how these ops play into the distribution passes, especially since these ops seem to prioritize usage above WI-level, judging by numerous tests with 2D payload. Will the distribution perform a plain offset and payload shape manipulation? What about the lowering? Does XeVM or SPIRV beneath it offer an efficient way to execute this op? If so, would the lowering be able to derive all of the needed built-in/intrinsic arguments without any named attributes?

The MatrixDescType doesn't have distribution layout. Instead, each op (load_matrix/store_matrix) has an optional layout attribute for this purpose. The doc is updated to reveal this.

silee2 · 2025-08-13T21:43:32Z

mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td

+  let summary = "MatrixDesc describing the data in SLM";
+  let description = [{
+    MatrixDesc represents a block of data stored in shared local memory.
+    By default, unless a layout attribute is provided, the data is stored


What is this layout?
I assume this is not distribution layout as you said it is not part of matrix desc type.

In the tests, there is usage like
!xegpu.matrix_desc<16x64xf16, strided<[1, 16]>>
Is strided<[1,16]> the layout attribute?
Is that attribute type not some that can be represented by
MemRefLayoutAttrInterface used in MemRefType ?
https://github.com/llvm/llvm-project/blob/main/mlir/include/mlir/IR/BuiltinTypes.td#L796-#L801
Looks very similar to MemRefType. For example,
memref<12x4xf32, strided<[4, 1], offset: 5>>

Please refer to intel/mlir-extensions#1092 for the motivation and explanation of the slm memory layout of matrix descriptor.

+1 to the questions here. It just reads as a MemRef layout which is fine but could use explicit clarification as layout term becomes quite overloaded within the dialect now.

Could you add more description here? Or at least an example snippet.

Jianhui-Li · 2025-08-13T22:36:10Z

mlir/test/Dialect/XeGPU/ops.mlir

+// CHECK: gpu.func @store_matrix_desc_with_stride([[ARG0:%.+]]: !xegpu.matrix_desc<16x64xf16, strided<[1, 16]>>, [[ARG1:%.+]]: vector<16x16xf16>)
+gpu.func @store_matrix_desc_with_stride(%arg0: !xegpu.matrix_desc<16x64xf16, strided<[1, 16]>>, %arg1: vector<16x16xf16>) {
+  // CHECK: xegpu.store_matrix [[ARG0]][8, 8], [[ARG1]] : !xegpu.matrix_desc<16x64xf16, strided<[1, 16]>>, vector<16x16xf16>
+  xegpu.store_matrix %arg0[8, 8], %arg1: !xegpu.matrix_desc<16x64xf16, strided<[1, 16]>>, vector<16x16xf16>


This store out-of-bound place?

maybe keep vector<16x16xf16> as vector<8x16xf16>

good catch, fixed.

Jianhui-Li · 2025-08-13T22:37:11Z

mlir/test/Dialect/XeGPU/ops.mlir

+// CHECK: gpu.func @store_matrix_desc([[ARG0:%.+]]: !xegpu.matrix_desc<16x64xf16>, [[ARG1:%.+]]: vector<16x16xf16>)
+gpu.func @store_matrix_desc(%arg0: !xegpu.matrix_desc<16x64xf16>, %arg1: vector<16x16xf16>) {
+  // CHECK: xegpu.store_matrix [[ARG0]][8, 8], [[ARG1]] : !xegpu.matrix_desc<16x64xf16>, vector<16x16xf16>
+  xegpu.store_matrix %arg0[8, 8], %arg1: !xegpu.matrix_desc<16x64xf16>, vector<16x16xf16>


we want to use the format below to be consistent with xegpu.store
xegpu.store_matrix %arg1, %arg0[8, 8]: vector<16x16xf16>, !xegpu.matrix_desc<16x64xf16>

Jianhui-Li · 2025-08-13T22:37:50Z

mlir/test/Dialect/XeGPU/ops.mlir

+// CHECK: gpu.func @matrix_desc_subview([[ARG0:%.+]]: !xegpu.matrix_desc<16x64xf16>)
+gpu.func @matrix_desc_subview(%arg0: !xegpu.matrix_desc<16x64xf16>) {
+  //CHECK: xegpu.matrix_desc_subview [[ARG0]][8, 8] : !xegpu.matrix_desc<16x64xf16> -> !xegpu.matrix_desc<8x16xf16>
+  %data = xegpu.matrix_desc_subview %arg0[8, 8]: !xegpu.matrix_desc<16x64xf16> -> !xegpu.matrix_desc<8x16xf16>


!xegpu.matrix_desc<8x16xf16> needs to have strides=[64, 1]

Jianhui-Li · 2025-08-14T02:03:03Z

mlir/test/Dialect/XeGPU/invalid.mlir

+// -----
+func.func @matrix_desc_subview_element_type_mismatch(%arg0: !xegpu.matrix_desc<16x64xf16>) {
+  // expected-error@+1 {{failed to verify that all of {src, res} have same element type}}
+  %data = xegpu.matrix_desc_subview %arg0[8, 8]: !xegpu.matrix_desc<16x64xf16> -> !xegpu.matrix_desc<8x16xf32>


this one also have an additional error, the strides need to be carried in the subview.
xegpu.matrix_desc<8x16xf32> => xegpu.matrix_desc<8x16xf32, strides=[64, 1]>

Jianhui-Li · 2025-08-14T02:20:39Z

mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td

+  }];
+  let parameters = (ins ArrayRefParameter<"int64_t">: $shape,
+                        "mlir::Type": $elementType,
+                        OptionalParameter<"mlir::Attribute">: $layout);


consider using mem_layout instead of layout, to differentiate with XeGPU.layout which describes the mapping between sg/lane ids to the data.

Jianhui-Li · 2025-08-14T02:23:22Z

mlir/test/Dialect/XeGPU/ops.mlir

+// CHECK: gpu.func @store_matrix_desc_with_stride([[ARG0:%.+]]: !xegpu.matrix_desc<16x64xf16, strided<[1, 16]>>, [[ARG1:%.+]]: vector<16x16xf16>)
+gpu.func @store_matrix_desc_with_stride(%arg0: !xegpu.matrix_desc<16x64xf16, strided<[1, 16]>>, %arg1: vector<16x16xf16>) {
+  // CHECK: xegpu.store_matrix [[ARG1]], [[ARG0]][8, 8] : vector<16x16xf16>, !xegpu.matrix_desc<16x64xf16, strided<[1, 16]>>
+  xegpu.store_matrix %arg1, %arg0[8, 8]: vector<16x16xf16>, !xegpu.matrix_desc<16x64xf16, strided<[1, 16]>>


For the format, would this alternative looks better ?

!xegpu.matrix_desc<16x64xf16, strides=[1, 16], blocksize=[16, 16]>

This format needs two attribute fields. It is a little bit hard to do the extension in downstream. I create a MemLayoutAttr to encode them, with format as !xegpu.matrix_desc<8x16xf16, #xegpu.mem_layout<stride = [64, 1]>, block = [8, 8]>, is it good to you?

Jianhui-Li

overall looks good. leave some minor comments

adam-smnk · 2025-08-14T16:40:52Z

A high-level question regarding naming, why a matrix descriptor?

AFAIK, it's specifically limited to SLM only as a storage and supports nD layouts.
matrix conventions seems to add an indirection layer that one has to keep in mind while it's somewhat unrelated to the concept it represents.
Wouldn't it make sense to lean into slm/slm_desc naming scheme directly?

adam-smnk · 2025-08-14T17:00:01Z

mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td

+  let summary = "MatrixDesc describing the data in SLM";
+  let description = [{
+    MatrixDesc represents a block of data stored in shared local memory.
+    By default, unless a layout attribute is provided, the data is stored


+1 to the questions here. It just reads as a MemRef layout which is fine but could use explicit clarification as layout term becomes quite overloaded within the dialect now.

Could you add more description here? Or at least an example snippet.

mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td

Jianhui-Li · 2025-08-15T00:37:50Z

mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td

+
+  let parameters = (ins "DictionaryAttr": $attrs);
+  let hasCustomAssemblyFormat = 1;
+


nit: remove extra line

Jianhui-Li · 2025-08-15T00:45:36Z

mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td

+    identified by `mem_desc`.
+
+    Arguments:
+     - `mem_desc`: the matrix descriptor specifying the SLM region.


nit: matrix -> memory

Jianhui-Li · 2025-08-15T00:45:51Z

mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td

+    using the provided matrix descriptor.
+
+    Arguments:
+     - `mem_desc`: the matrix descriptor identifying the SLM region.


nit: matrix -> memory

Jianhui-Li · 2025-08-15T00:49:12Z

mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td

+  let description = [{
+    Creates a matrix descriptor from a shared local memory (SLM) buffer.
+    The resulting matrix descriptor has to have the same size as the underlying
+    shared local memory.


shared local memory => memory. The memory descriptor itself doesn't have to associated with share local memory.

If it's not associated with specific memory space(s), then how does it differ from tensor_desc?

Jianhui-Li · 2025-08-15T02:18:52Z

mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td

+def XeGPU_MemDesc: XeGPUTypeDef<"MemDesc", "mem_desc", [ShapedTypeInterface], "mlir::Type"> {
+  let summary = "MemDesc describing the data in SLM";
+  let description = [{
+    MemDesc represents a block of data stored in shared local memory.


consider "a block of data" => "multi-dimensional array buffer". MemDesc associates "structure" information to the buffer (block of data) so it can be viewed as multi-dimension array buffer.

Jianhui-Li

LGTM

mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td

adam-smnk

Thanks for the changes. Looks good 👍

adam-smnk · 2025-08-15T15:45:55Z

Reminder to update PR description to reflect the newest changes.

silee2

Looks good to me.

Broken by llvm#153273.

Broken by #153273.

Broken by llvm/llvm-project#153273.

chencha3 added 5 commits August 8, 2025 15:57

init

cce8aba

sync

76ccc39

Merge branch 'main' into ld_st_matrix

e62da97

add unit tests for create_matrix_desc

cb0a195

add unit test for load_matrix and store_matrix

98871cc

chencha3 changed the title ~~[mlir][xegpu] Add definiton of MatrixDescType and related ops.~~ [mlir][xegpu] Add definitons of MatrixDescType and related ops. Aug 12, 2025

refine description

06eec6e

chencha3 marked this pull request as ready for review August 12, 2025 20:35

llvmbot added mlir:gpu mlir labels Aug 12, 2025

chencha3 requested review from adam-smnk and charithaintc and removed request for charithaintc August 12, 2025 20:36

chencha3 requested a review from charithaintc August 12, 2025 20:37

silee2 reviewed Aug 12, 2025

View reviewed changes

add subview op

6df4291

akroviakov reviewed Aug 13, 2025

View reviewed changes

address comments

e11c88d

chencha3 added 2 commits August 13, 2025 14:30

update doc

23380a9

remove the layout attribute from the subview op

9e3aa8d

silee2 reviewed Aug 13, 2025

View reviewed changes

Jianhui-Li reviewed Aug 13, 2025

View reviewed changes

refine subview op

af2c25f

Jianhui-Li reviewed Aug 14, 2025

View reviewed changes

adam-smnk reviewed Aug 14, 2025

View reviewed changes

chencha3 added 2 commits August 14, 2025 21:00

add MemLayoutAttr

0531abf

refine

0385088

Jianhui-Li reviewed Aug 15, 2025

View reviewed changes

rename matrix_desc to mem_desc

f6862fa

Jianhui-Li reviewed Aug 15, 2025

View reviewed changes

update docs

552c871

Jianhui-Li reviewed Aug 15, 2025

View reviewed changes

Jianhui-Li approved these changes Aug 15, 2025

View reviewed changes

adam-smnk reviewed Aug 15, 2025

View reviewed changes

mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td Show resolved Hide resolved

refine the doc

7fa15b9

adam-smnk approved these changes Aug 15, 2025

View reviewed changes

silee2 approved these changes Aug 15, 2025

View reviewed changes

chencha3 merged commit 9c4e571 into llvm:main Aug 15, 2025
9 checks passed

chencha3 changed the title ~~[mlir][xegpu] Add definitons of MatrixDescType and related ops.~~ [mlir][xegpu] Add definitions of MemDescType and related ops. Aug 15, 2025

wecing added a commit to wecing/llvm-project that referenced this pull request Aug 15, 2025

[bazel] Fix //mlir:XeGPUDialect compilation.

d8c33dd

Broken by llvm#153273.

wecing mentioned this pull request Aug 15, 2025

[bazel] Fix //mlir:XeGPUDialect compilation. #153904

Merged

rupprecht pushed a commit that referenced this pull request Aug 15, 2025

[bazel] Fix //mlir:XeGPUDialect compilation. (#153904)

e68989b

Broken by #153273.

llvm-sync bot pushed a commit to arm/arm-toolchain that referenced this pull request Aug 16, 2025

Automerge: [bazel] Fix //mlir:XeGPUDialect compilation. (#153904)

f75f11e

Broken by llvm/llvm-project#153273.

Garra1980 mentioned this pull request Aug 20, 2025

[LLVM Pulldown] Bump LLVM rev b44e47a68f9b49a6283b1beaab3af55fa39e8907 intel/mlir-extensions#1110

Merged

6 tasks

chencha3 mentioned this pull request Aug 21, 2025

[mlir][XeGPU] Update utils for LayoutAttr and SliceAttr support #154819

Merged


		let parameters = (ins "DictionaryAttr": $attrs);
		let hasCustomAssemblyFormat = 1;

[mlir][xegpu] Add definitions of MemDescType and related ops. #153273

[mlir][xegpu] Add definitions of MemDescType and related ops. #153273

Uh oh!

Conversation

chencha3 commented Aug 12, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented Aug 12, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

silee2 Aug 12, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Jianhui-Li Aug 13, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Garra1980 commented Aug 12, 2025

Uh oh!

akroviakov left a comment

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

chencha3 commented Aug 13, 2025

Uh oh!

Choose a reason for hiding this comment

Uh oh!

silee2 Aug 13, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

chencha3 Aug 14, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Jianhui-Li left a comment

Choose a reason for hiding this comment

Uh oh!

chencha3 commented Aug 12, 2025 •

edited

Loading

llvmbot commented Aug 12, 2025 •

edited

Loading

silee2 Aug 12, 2025 •

edited

Loading

Jianhui-Li Aug 13, 2025 •

edited

Loading

silee2 Aug 13, 2025 •

edited

Loading

chencha3 Aug 14, 2025 •

edited

Loading